#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
多线程爬虫执行器 - 轻量级并发方案
"""

import concurrent.futures
import time
import os
import threading
from scrape_restaurant_urls import scrape_restaurant_urls

def run_country_thread(country):
    """为单个国家运行爬虫的线程函数"""
    thread_name = threading.current_thread().name
    print(f"🚀 [{thread_name}] 开始处理国家: {country}")
    start_time = time.time()
    
    try:
        # 检查国家的主控文件是否存在
        master_file = f"{country}_city_urls.csv"
        if not os.path.exists(master_file):
            print(f"❌ [{thread_name}] 错误: 找不到主控文件 '{master_file}'。跳过国家 {country}。")
            return f"{country}: 文件不存在"
        
        # 执行爬虫
        result = scrape_restaurant_urls(data={"country": country})
        elapsed = time.time() - start_time
        print(f"✅ [{thread_name}] 国家 {country} 处理完成，耗时: {elapsed:.2f} 秒")
        return f"{country}: 成功 ({elapsed:.2f}s)"
        
    except Exception as e:
        elapsed = time.time() - start_time
        print(f"❌ [{thread_name}] 国家 {country} 处理失败: {e}，耗时: {elapsed:.2f} 秒")
        return f"{country}: 失败 - {str(e)}"

def main():
    """主函数 - 多线程执行爬虫"""
    print("=" * 60)
    print("🌍 多线程爬虫执行器启动")
    print("=" * 60)
    
    start_time = time.time()
    
    # 定义要并发处理的国家列表
    countries_to_scrape = ["印度尼西亚", "菲律宾", "马来西亚"]
    
    # 检查所有国家的文件是否存在
    available_countries = []
    for country in countries_to_scrape:
        master_file = f"{country}_city_urls.csv"
        if os.path.exists(master_file):
            available_countries.append(country)
            print(f"📁 找到国家文件: {country}")
        else:
            print(f"⚠️  警告: 找不到国家文件: {country}，将跳过")
    
    if not available_countries:
        print("❌ 错误: 没有可用的国家文件，请检查文件是否存在")
        return
    
    print(f"\n🎯 准备多线程处理 {len(available_countries)} 个国家: {', '.join(available_countries)}")
    print(f"🧵 最大线程数: 3 (浏览器实例)")
    print("-" * 60)
    
    # 使用线程池执行器
    with concurrent.futures.ThreadPoolExecutor(max_workers=3, thread_name_prefix="Scraper") as executor:
        # 提交所有任务
        future_to_country = {
            executor.submit(run_country_thread, country): country 
            for country in available_countries
        }
        
        results = []
        # 等待所有任务完成并收集结果
        for future in concurrent.futures.as_completed(future_to_country):
            country = future_to_country[future]
            try:
                result = future.result()
                results.append(result)
            except Exception as e:
                results.append(f"{country}: 异常 - {str(e)}")
    
    # 输出结果汇总
    print("\n" + "=" * 60)
    print("📊 多线程处理结果汇总")
    print("=" * 60)
    for result in results:
        print(f"   {result}")
    
    total_time = time.time() - start_time
    print(f"\n⏱️  总耗时: {total_time:.2f} 秒")
    print("🎉 所有国家多线程处理完毕！")

if __name__ == "__main__":
    main()